# ------------------------------------------------------------------------------#
# Description: Create Figure 4a and 4b: Peer effects by visibility and timing
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author: Daniel A. Brent | dab320@psu.edu
# Created: 2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, fixest, ggplot2)

options(scipen = 999)

# Load and merge base data -----------------------------------------------------
load("data/adoptions/adoptions.RData")
load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(adopt, peer.adopt.bucket.any, by = c("pin", "year"))
rm(adopt, peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_any_rg.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs, by = c("pin", "year"))
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both, by = c("pin", "year"))
rm(peer.adopt.bucket.both)

load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(dat, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Scale peer adoption variables ------------------------------------------------
dat[, peer.adopt.any.100.scale    := peer.adopt.any.100 / 100]
dat[, peer.adopt.any.rg.100.scale := peer.adopt.any.rg.100 / 100]
dat[, peer.adopt.rg.100.scale     := peer.adopt.rg.100 / 100]
dat[, peer.adopt.cs.100.scale     := peer.adopt.cs.100 / 100]
dat[, peer.adopt.both.100.scale   := peer.adopt.both.100 / 100]

dat <- dat[year > 2010]
dat[, elig.peers.avg.100 := elig.peers.cs.100 + elig.peers.rg.100]

# Figure 4a: Peer effects varying peer group size ------------------------------

for (i in c(100, 80, 60, 40, 20)) {
  dat[, paste0("peer.adopt.any.", i, ".scale") := get(paste0("peer.adopt.any.", i)) / 100]
  dat[, paste0("elig.peers.avg.", i) := (get(paste0("elig.peers.cs.", i)) + get(paste0("elig.peers.rg.", i))) / 2]
  
  assign(paste0("m", i),
         felm(adopt.any ~ get(paste0("peers.", i)) | blkgrp + year |
                (get(paste0("peer.adopt.any.", i, ".scale")) ~ get(paste0("elig.peers.avg.", i))) |
                blkgrp,
              data = dat[elig.cs == 1 & post.rainwise == 0])
  )
}

load("data/iv/iv_diag_numpeers.RData")

coef.plot <- data.table(
  num = c(seq(20,100,by=20)),
  beta = c(m20$coefficients[2],
           m40$coefficients[2],
           m60$coefficients[2],
           m80$coefficients[2],
           m100$coefficients[2]),
  ci_low = c(iv_diag_numpeers[['m.any.20']]$AR$ci[1],
             iv_diag_numpeers[['m.any.40']]$AR$ci[1],
             iv_diag_numpeers[['m.any.60']]$AR$ci[1],
             iv_diag_numpeers[['m.any.80']]$AR$ci[1],
             iv_diag_numpeers[['m.any.100']]$AR$ci[1]),
  ci_hi = c(iv_diag_numpeers[['m.any.20']]$AR$ci[2],
            iv_diag_numpeers[['m.any.40']]$AR$ci[2],
            iv_diag_numpeers[['m.any.60']]$AR$ci[2],
            iv_diag_numpeers[['m.any.80']]$AR$ci[2],
            iv_diag_numpeers[['m.any.100']]$AR$ci[2])
)

gg <- ggplot(coef.plot, aes(x = num, y = beta)) + 
  geom_point(size = 6, color = 'dodgerblue') +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_hi), linewidth = 2, width = 3, color = 'dodgerblue') + 
  xlab("Size of Peer Group") + 
  ylab("Effect on Adoption (percentage points)") +
  scale_x_continuous(limits = c(18, 102), breaks = seq(20, 100, 20)) +
  scale_y_continuous(limits = c(0, 2), breaks = seq(0, 2, 0.25)) +  
  theme_bw(base_size = 30)

print(gg)

#Save
ggsave("output/figures/figure_4a.png", gg, width = 150, height = 150, units = "mm", dpi = 150, scale = 2)

# Figure 4b: Timing in peer adoption ------------------------------------------

load("data/time/peer_adopt_t_any.RData")
dat <- merge(dat, peer.adopt.t.any, by = c("pin", "year"))
rm(peer.adopt.t.any)

load("data/time/peer_adopt_t_any_rg.RData")
dat <- merge(dat, peer.adopt.t.any.rg, by = c("pin", "year"))
rm(peer.adopt.t.any.rg)

load("data/time/peer_adopt_t_cs.RData")
dat <- merge(dat, peer.adopt.t.cs, by = c("pin", "year"))
rm(peer.adopt.t.cs)

load("data/time/peer_adopt_t_both.RData")
dat <- merge(dat, peer.adopt.t.both, by = c("pin", "year"))
rm(peer.adopt.t.both)

for (i in 1:5) {
  dat[, paste0("peer.adopt.any.", i, ".scale") := get(paste0("peer.adopt.any.", i)) / 100]
  
  assign(paste0("m", i),
         felm(adopt.any ~ peers.100 | blkgrp + year |
                (get(paste0("peer.adopt.any.", i, ".scale")) ~ elig.peers.avg.100) |
                blkgrp,
              data = dat[elig.cs == 1 & post.rainwise == 0])
  )
}

load("data/iv/iv_diag_timing.RData")

coef.plot <- data.table(
  num = c(seq(1,5,by=1)),
  beta = c(m1$coefficients[2],
           m2$coefficients[2],
           m3$coefficients[2],
           m4$coefficients[2],
           m5$coefficients[2]),
  ci_low = c(iv_diag_timing[['m.any.1']]$AR$ci[1],
             iv_diag_timing[['m.any.2']]$AR$ci[1],
             iv_diag_timing[['m.any.3']]$AR$ci[1],
             iv_diag_timing[['m.any.4']]$AR$ci[1],
             iv_diag_timing[['m.any.5']]$AR$ci[1]),
  ci_hi = c(iv_diag_timing[['m.any.1']]$AR$ci[2],
            iv_diag_timing[['m.any.2']]$AR$ci[2],
            iv_diag_timing[['m.any.3']]$AR$ci[2],
            iv_diag_timing[['m.any.4']]$AR$ci[2],
            iv_diag_timing[['m.any.5']]$AR$ci[2])
)

gg <- ggplot(coef.plot, aes(x = num, y = beta)) + 
  geom_point(size = 6, color = 'dodgerblue') +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_hi), linewidth = 2, width = 0.2, color = 'dodgerblue') + 
  xlab("Years since peer adoption") + 
  ylab("Effect on Adoption (percentage points)") +
  theme_bw(base_size = 30)

print(gg)

#Save
ggsave("output/figures/figure_4b.png", gg, width = 150, height = 150, units = "mm", dpi = 150, scale = 2)
